In [5]:
    
import numpy as np
from numpy import ma
from pyproj import Geod
from metpy.io.nexrad import Level2File
from metpy.plots import ctables
import pandas as pd
import geopandas as gpd
from shapely.geometry import Point
import os, tarfile, wget, re
import boto3
from botocore.handlers import disable_signing
import matplotlib.pyplot as plt
%matplotlib inline
    
In [24]:
    
s3_client = boto3.client('s3')
resource = boto3.resource('s3')
# Disable signing for anonymous requests to public bucket
resource.meta.client.meta.events.register('choose-signer.s3.*', disable_signing)
nexrad_bucket = resource.Bucket('noaa-nexrad-level2')
paginator = s3_client.get_paginator('list_objects')
klot_95 = list()
for result in s3_client.list_objects(Bucket='noaa-nexrad-level2', Prefix='1995')['Contents']:
    #print(result)
    #print(type(result['Key']))
#     if (result['Key'].find('KLOT') != -1) and (result['Key'].endswith('.gz')):
#     #if (result['Key'].endswith('.gz')):
#         klot_95.append(result['Key'])
# print(klot_95[:5])
    
    
In [14]:
    
s3_client = boto3.client('s3')
resource = boto3.resource('s3')
# Disable signing for anonymous requests to public bucket
resource.meta.client.meta.events.register('choose-signer.s3.*', disable_signing)
nexrad_bucket = resource.Bucket('noaa-nexrad-level2')
local_filepath = 'nexrad_data/KLOT20110723_074851_V03.gz'
nexrad_bucket.download_file('2011/07/23/KLOT/KLOT20110723_074851_V03.gz', local_filepath)
f = Level2File(local_filepath)
f.sweeps[0][0]
    
    Out[14]:
In [4]:
    
local_filepath = 'nexrad_data/KLOT20110723_074851_V03.gz'
f = Level2File(local_filepath)
# Pull data out of the file
sweep = 0
# First item in ray is header, which has azimuth angle
az = np.array([ray[0].az_angle for ray in f.sweeps[sweep]])
# 5th item is a dict mapping a var name (byte string) to a tuple
# of (header, data array)
ref_hdr = f.sweeps[sweep][0][4][b'REF'][0]
ref_range = np.arange(ref_hdr.num_gates) * ref_hdr.gate_width + ref_hdr.first_gate
ref = np.array([ray[4][b'REF'][1] for ray in f.sweeps[sweep]])
    
In [5]:
    
# fig, axes = plt.subplots(1, 2, figsize=(15, 8))
# for var_data, var_range, ax in zip((ref, rho), (ref_range, rho_range), axes):
# Turn into an array, then mask
data = ma.array(ref)
data[np.isnan(data)] = ma.masked
# Convert az,range to x,y
xlocs = ref_range * np.sin(np.deg2rad(az[:, np.newaxis]))
ylocs = ref_range * np.cos(np.deg2rad(az[:, np.newaxis]))
# Plot the data
cmap = ctables.registry.get_colortable('viridis')
plt.pcolormesh(xlocs, ylocs, data, cmap=cmap)
#plt.set_aspect('equal', 'datalim')
plt.show()
    
    
In [54]:
    
def extract_data(filepath):
    f = Level2File(filepath)
    # Pull data out of the file
    sweep = 0
    # First item in ray is header, which has azimuth angle
    az = np.array([ray[0].az_angle for ray in f.sweeps[sweep]])
    # 5th item is a dict mapping a var name (byte string) to a tuple
    # of (header, data array)
    ref_hdr = f.sweeps[sweep][0][4][b'REF'][0]
    ref_range = np.arange(ref_hdr.num_gates) * ref_hdr.gate_width + ref_hdr.first_gate
    ref = np.array([ray[4][b'REF'][1] for ray in f.sweeps[sweep]])
    
    data_hdr = f.sweeps[sweep][0][1]
    
    data = ma.array(ref)
    data[data==0] = ma.masked
    # Data from MetPy needs to be converted to latitude and longitude coordinates
    g = Geod(ellps='clrk66')
    center_lat = np.ones([len(az),len(ref_range)])*data_hdr.lat
    center_lon = np.ones([len(az),len(ref_range)])*data_hdr.lon
    az2D = np.ones_like(center_lat)*az[:,None]
    rng2D = np.ones_like(center_lat)*np.transpose(ref_range[:,None])*1000
    lon,lat,back = g.fwd(center_lon,center_lat,az2D,rng2D)
    
    return lon, lat, data
def unstack_process(lon, lat, data):
    lat_df = pd.DataFrame(lat)
    lon_df = pd.DataFrame(lon)
    data_df = pd.DataFrame(data)
    
    lon_stack = lon_df.stack().reset_index()
    lon_stack = lon_stack.rename(columns={'level_0': 'x', 'level_1': 'y', 0: 'lon'})
    lat_stack = lat_df.stack().reset_index()
    lat_stack = lat_stack.rename(columns={'level_0': 'x', 'level_1': 'y', 0: 'lat'})
    coord_merge = pd.merge(lat_stack, lon_stack, on=['x', 'y']).reset_index()
    # Reducing to bounding box through selection rather than geospatial operation
    coord_merge = coord_merge.loc[(coord_merge['lat'] <= 42.0231311) &
                                  (coord_merge['lat'] >= 41.644335) &
                                  (coord_merge['lon'] <= -87.524044) &
                                  (coord_merge['lon'] >= -87.940267)]
    data_stack = data_df.stack().reset_index()
    data_stack = data_stack.rename(columns={'level_0': 'x', 'level_1': 'y', 0: 'precip'})
    merged_data = pd.merge(coord_merge, data_stack, on=['x', 'y'], how='left')[['lat','lon','precip']]
    nexrad_df = merged_data.dropna().copy()
    # Convert precip in dBZ into mm/hr using Marshall-Palmer https://en.wikipedia.org/wiki/DBZ_(meteorology)
    nexrad_df.loc['precip'] = pow(pow(10, nexrad_df['precip']/10)/200, 0.625)
    return nexrad_df.dropna()
    
def spatial_join(nexrad_df, gpd_file, group_col, file_time):
    geo_df = gpd.read_file(gpd_file)
    crs = {'init':'epsg:4326'}
    geo_df.crs = crs
    geometry = nexrad_df.apply(lambda z: Point(z['lon'], z['lat']), axis=1).dropna()
    #geometry = [Point(xy) for xy in zip(nexrad_df.lon, nexrad_df.lat)]
    nexrad_geo = gpd.GeoDataFrame(nexrad_df, crs=crs, geometry=geometry)
    nexrad_geo.crs = geo_df.crs
    merged_nexrad = gpd.tools.sjoin(nexrad_geo, geo_df, how='right', op='within') #.reset_index()
    nexrad_grouped = merged_nexrad.groupby([group_col])['precip'].mean().reset_index()
    nexrad_grouped[group_col] = nexrad_grouped[group_col].astype(int)
    nexrad_grouped.fillna(value=0, inplace=True)
    nexrad_grouped.sort_values(by=group_col, inplace=True)
    nexrad_grouped.to_csv('data/nexrad_processed/{}_{}.csv'.format(group_col, file_time), index=False)
    
In [57]:
    
file_time = re.search(r'\d{8}_\d{6}',local_filepath).group()
lon, lat, data = extract_data(local_filepath)
print(data.shape)
nexrad_df = unstack_process(lon, lat, data)
spatial_join(nexrad_df, 'data/chicago_wards.geojson', 'ward', file_time)
    
    
In [52]:
    
geo_df.info(verbose=True)
    
    
In [53]:
    
nexrad_df.info(verbose=True)
    
    
In [ ]: